# ------------------------------------------------------------------------------
# NOTE:
# This script fits the Wordfish models to estimate parties' committee-specific
# positions. However, due to changes in the quanteda and quanteda.textmodels
# packages over time, this script may not run successfully with current package
# versions. As explained in the README, the Wordfish estimation was originally
# conducted using versions of quanteda and related packages from around
# April 2021.
# ------------------------------------------------------------------------------

require(stringr)
require(dplyr)
require(tidyr)
require(quanteda)
require(quanteda.textmodels)
require(Rcpp)
require(RcppArmadillo)

# If you use macOS, install an appropriate gfortran binary before running this script.
# Mojave (10.14)–Catalina (10.15): gfortran 8.2
# https://github.com/fxcoudert/gfortran-for-macOS/releases/download/8.2/gfortran-8.2-Mojave.dmg
# Sierra (10.12)–High Sierra (10.13): gfortran 6.3
# https://github.com/fxcoudert/gfortran-for-macOS/releases/download/6.3/gfortran-6.3-Sierra.dmg

# parametric bootstrap for wordfish estimates
committee.names <- c("内閣委員会", "総務委員会", "法務委員会", "外交防衛委員会", 
                     "財務金融委員会", "文部科学委員会", "厚生労働委員会", 
                     "農林水産委員会", "経済産業委員会", "国土交通委員会", 
                     "交通・情報通信・国土・環境委員会", "環境委員会")
party.names <- c()
for (i in 1:61) {
  positions <- readRDS(paste0("Wordfish/positions_", 1958 + i, ".rds"))
  party.names <- unique(c(party.names, rownames(positions)))
}
party.names <- sort(party.names)

sourceCpp("1-a_wordfish_dense_modified.cpp")
iter <- 100
for (i in 1:61) {  # i indexes years
  positions <- readRDS(paste0("Wordfish/positions_", 1958 + i, ".rds"))
  positions[is.nan(positions)] <- NA
  # drop committees with all-missing party positions in this year
  delete <- apply(positions, MARGIN = 2, FUN = function(x){return(sum(is.na(x)))})
  positions <- positions[, delete != nrow(positions)]
  wordfish.list <- readRDS(paste0("Wordfish/Wordfish_results_", 1958 + i, ".rds"))
  wordfish.list <- wordfish.list[delete != nrow(positions)]
  committee.result <- array(NA, dim = c(length(party.names), length(committee.names), iter))
  dimnames(committee.result) <- list(party.names, committee.names, 1:100)
  for (j in 1:length(wordfish.list)) {  # j indexes committees
    committee.wordfish.result <- wordfish.list[[j]]
    n <- length(committee.wordfish.result$theta)
    m <- length(committee.wordfish.result$beta)
    alpha <- committee.wordfish.result$alpha
    psi <- committee.wordfish.result$psi
    beta <- committee.wordfish.result$beta
    theta <- committee.wordfish.result$theta
    dir <- as.integer(committee.wordfish.result$dir)
    priors <- 1 / (committee.wordfish.result$priors ^ 2)
    raw.simulation.data <- matrix(NA, n, m)
    simulation.result <- matrix(NA, iter, n)
    for (k in 1:iter) {  # repeat bootstrap iterations
      # draw a parametric bootstrap sample
      set.seed(100000 * i + 1000 * j + k)
      for (l in 1:n) {
        raw.simulation.data[l, ] <- rpois(m, exp(alpha[l] + psi + beta * theta[l]))
      }
      # drop terms with zero counts to avoid estimation errors
      empty.feats <- which(colSums(raw.simulation.data) == 0)
      if (length(empty.feats) > 0) {
        simulation.data <- raw.simulation.data[, -1 * empty.feats]
        init.psi <- psi[-1 * empty.feats]
        init.beta <- beta[-1 * empty.feats]
      } else {
        simulation.data <- raw.simulation.data
        init.psi <- psi
        init.beta <- beta
      }
      init.alpha <- alpha
      init.theta <- theta
      # re-estimate the model using bootstrap samples, with original estimates as initial values
      bootstrap.result <- qatd_cpp_wordfish_dense_boot(wfm = simulation.data, 
                                                       dir = dir, priors = priors,
                                                       tol = c(1e-06, 1e-08), abs_err = FALSE, 
                                                       init_alpha = init.alpha, init_psi = init.psi, 
                                                       init_beta = init.beta, init_theta = init.theta)
      simulation.result[k, ] <- bootstrap.result$theta.boot
    }
    party.point <- grep(dimnames(committee.result)[[1]], 
                        pattern = paste(names(na.omit(positions[,j])), collapse = "|"))
    committee.point <- charmatch(dimnames(committee.result)[[2]], x = colnames(positions)[j])
    for (l in 1:length(party.point)) {
      committee.result[party.point[l], committee.point, ] <- simulation.result[,l]
    }
  }
  # save bootstrap results
  saveRDS(committee.result, file = paste0("Wordfish_bootstrap/positions_", 1958 + i, "_boot.rds"))
  print(paste0("completed processing for", 1958 + i))
}
